/*
 * TopicLearningGibbs.h
 *
 *  Created on: Apr 4, 2011
 *      Author: Clint P. George
 * Description: This class implements the LDA Product
 * 				Partition Model Gibbs sampler with
 * 				topic learning
 */

#ifndef TOPICLEARNINGGIBBS_H_
#define TOPICLEARNINGGIBBS_H_

#include <string>
#include <fstream>
#include <assert.h>
#include <armadillo>
#include <iostream>

#include "LDAPPMBase.h"

using namespace std;
using namespace arma;

/*
 * Different topic learning methods
 */
enum LearningMethod{
	Regular,
	Stick_Breaking,
};

class TopicLearningGibbs: public LDAPPMBase {

private:

	double alpha_;
	umat z_bp_; 								// for burn in period
	uvec z_; 									// for word topic assignments
	uvec z_mode_; 								// for burn in period
	uvec topic_word_counts_; 					// number of topic occurrences (in the corpus)
	ivec topic_indices_;

	mat theta_counts_;
	mat beta_counts_;
	mat theta_sample_;
	mat beta_sample_;
	mat theta_sample_bp_; 						// for burn in period
	mat beta_sample_bp_; 						// for burn in period

	void init_theta ();
	void init_beta ();
	void init_z ();

	vec calc_partition_counts (size_t num_topics, vector<size_t> idx_z_ids);

public:

	size_t burn_in_period_;
	size_t num_learned_topics_;
	size_t topic_upper_bound_;
	double sb_a_; 								// hyper parameter for stick breaking
	double sb_b_; 								// hyper parameter for stick breaking
	double eta_;
	vec alpha_vec_;
	enum LearningMethod learning_method_;

	TopicLearningGibbs(size_t num_topics, 		// Regular
			size_t max_iterations,
			size_t burn_in_period,
			double alpha,
			double eta,
			string data_file,
			string vocab_file,
			size_t topic_upper_bound);
	TopicLearningGibbs(size_t num_topics, 		// Stick breaking
			size_t max_iterations,
			size_t burn_in_period,
			double sb_a,
			double sb_b,
			double eta,
			string data_file,
			string vocab_file,
			size_t topic_upper_bound);
	virtual ~TopicLearningGibbs();
	void run_gibbs();
	void save_state(string state_name);

	double calc_model_perplexity();
	long double calc_log_partition_probality();

};

#endif /* TOPICLEARNINGGIBBS_H_ */
